rm(list = ls())

# # Uncomment and set the working directory with all Harvard database here
# setwd("")


# Preliminary requirements ------------------------------------------------

# libraries
library("pacman") # One library to load (and download if necessary) other packages
p_load(tidyverse,readxl,
       ggplot2,ggeffects,patchwork,
       ggspatial,ggpubr,prettymapr,sf,
       modelsummary,magrittr,kableExtra,
       estimatr,fixest, MatchIt,ivdesc,
       pwr,DeclareDesign,DesignLibrary,future.apply,knitr,
       lfe,BiocManager,coefplot,
       scales,corrplot)


# Data --------------------------------------------------------------------

#open individual-level dataset
df <- readRDS(file = "work_democracy_data.rds")

# subset and new dataframe only including those drafted as poll officers
df_mesa <- df |> filter(dataset==2)


# Appendix C. Descriptive data --------------------------------------------

##### Individual-level ####

###### Distribution of compliers #####
df_mesa1 <- df_mesa
# Status variable
df_mesa1$status <- NA
df_mesa1$status[df_mesa1$DOGC_acting==0 & df_mesa1$mesa33==0] <- "Compliers" #Not drafted and stayed non-drafted
df_mesa1$status[df_mesa1$DOGC_acting==1 & df_mesa1$mesa33==1] <- "Compliers" #Drafted and stayed Drafted
df_mesa1$status[df_mesa1$DOGC_acting==0 & df_mesa1$mesa33==1] <- "Always takers" #Non drafted but did go
df_mesa1$status[df_mesa1$DOGC_acting==1 & df_mesa1$mesa33==0] <- "Never takers" #Drafted and did not go

# Factor labelling
df_mesa1$DOGC_acting_f <- factor(df_mesa1$DOGC_acting,
                                 levels=c(0,1),
                                 labels=c("Draft: Alternate", "Draft: Acting"))

df_mesa1$mesa_1933_f <- factor(df_mesa1$mesa33,
                               levels=c(0,1),
                               labels=c("Not Poll Officer", "Poll Officer"))

df_mesa1$vot_1936_f <- factor(df_mesa1$vot_1936,
                              levels=c(0,1,2),
                              labels=c("Abstention", "Vote",NA))

# Remove individuals who were lost over time
df_mesa1 <- df_mesa1 |> filter(!is.na(vot_1936))

# Plot - preliminary version
ggplot(df_mesa1, aes(y = vot_1936_f, x = mesa_1933_f)) + 
  geom_point(aes(color = status , shape = female,), 
             position = position_jitter(height = .25, width = 0.25)) + 
  facet_wrap(vars(DOGC_acting_f)) +
  scale_x_discrete(labels = label_wrap(10)) +
  labs(color = "Type of person", shape = "Sex",
       x = NULL, y = "Turnout 1936") +
  scale_color_manual(values = c("lightgray", "darkgray", "black"))+
  # scale_color_viridis_d(option = "plasma", end = 0.85) +
  theme_bw()

# Equivalent and clearer plot 
df_mesa2 <- df_mesa1 |> 
  group_by(mesa_1933_f,status,female,DOGC_acting_f) |>
  summarise(turnout36 = mean(vot_1936, na.rm = T)*100,
            n = n())

fig_c1 <- df_mesa2 |> 
  ggplot(aes(y= turnout36, x = mesa_1933_f, fill = female)) +
  geom_col(position = "dodge")+
  geom_text(aes(label = round(turnout36,digits = 1)),vjust = 1.5, color = "white",position = position_dodge(.9),size = 2.5)+
  facet_grid(DOGC_acting_f ~ status)+ # DOGC_acting_f
  scale_x_discrete(labels = label_wrap(10)) +
  scale_fill_manual(values = c("darkgray", "black"))+
  labs(y = "% Turnout 1936", x = "", fill = "", #caption ="Numbers indicate # observations"
  ) +
  theme_bw() +
  theme(legend.position = "bottom")

fig_c1

rm(df_mesa1, df_mesa2)

###### Profiling compliers #####

# Create variables
df <- df |> 
  mutate(treatment = case_when(mesa33==0 ~ 0, # Effectively poll officer
                               mesa33==1 ~ 1, T ~NA),
         dogc = case_when(DOGC_acting==0 ~ 0, # Acting role, based on draft (Official Gazette: DOGC)
                          DOGC_acting==1 ~ 1, T ~NA),
         femalenum = case_when(female=="Men" ~ 0, 
                               female=="Women" ~ 1, T ~NA),
         young = case_when(age>39 ~ 0, # young
                           age<40 ~ 1, T ~NA),
         occupation = case_when(Hisco1!="Primary Sector" ~ 0, # occupation
                                Hisco1=="Primary Sector" ~ 1, T ~NA))

dat <- df |> 
  ungroup()  |>  
  select(treatment,dogc,femalenum,young,occupation) |>  
  gather(var,val,-treatment,-dogc)

res <- dat |> 
  group_by(var) |>  
  na.omit() |> 
  do(m=ivdesc(X=.$val,D=.$treatment,Z=.$dogc, bootn=500))

# Combine 
res <- unnest(res)

resA <- res  |> 
  select(group,pi,pi_se) |> 
  mutate(var="proportion") |>  
  rename(mu=pi,mu_se=pi_se)

resB <- res |>  
  select(var,group,mu,mu_se)

res <- bind_rows(resA,resB) |>  
  mutate(lo=mu+mu_se*qnorm(0.025), 
         hi=mu+mu_se*qnorm(0.975))


flabso <- c("proportion","femalenum", 
            "young","occupation")

glabso <- rev(c("sample", "co", "nt", "at"))

res <- ungroup(res) |> 
  mutate(var=factor(var, levels=flabso),
         group=factor(group, levels=glabso))

flabs <- as_labeller(
  c("proportion"="Proportion",
    "femalenum"="Women [0-1]",
    "young"="Youth [0,1]",
    "occupation"="Primary [0,1]"))

xlabs <- c("co"="Complier", "nt"="Never-taker", 
           "at"="Always-taker", sample="Sample")

# Graph for complier and noncomplier population
fig_c2 <- ggplot(res, aes(group,mu)) + geom_point() + 
  facet_wrap("var", scale="free_x", labeller=flabs, ncol=3, nrow=3) + 
  geom_linerange(aes(ymin=lo,ymax=hi)) +  
  scale_x_discrete(label=xlabs) +  
  scale_shape_identity() + 
  coord_flip() + ylab("Mean and 95% confidence interval") + 
  xlab("") + theme_minimal() + theme(text = element_text(size=8)) + 
  theme(panel.spacing = unit(0.8, "lines")) 

fig_c2

rm(dat,res,resA,resB,flabs,flabso,glabso,xlabs)

##### Municipality-level ####

###### Balance in contextual characteristics #####
df_muni <- df  |> 
  group_by(cmuni,samplewomen,muni) |> 
  summarise(femturnout33 = mean(women_voters_p),
            logindust = mean(logindust),
            logpop = mean(logpop),
            literacymuni = mean(literacymuni),
            femalerate = mean(femalerate),
            literacyfemale = mean(literacyfemale),
            femalemarried = mean(femalemarried),
            Left_33 = mean(Left_33),
            Right_33 = mean(Right_33),
            turnout_33= mean(turnout_33),
            ateneu_dummy= mean(ateneu_dummy),
            rally_lliga= mean(rally_lliga),
            cnt_dummy= mean(cnt_dummy),
            StrikePercent= mean(StrikePercent),
            popconcent= mean(popconcent),
            cath_ass_percent33= mean(cath_ass_percent33)) 

# No differences between municipalities with census tracts with only men and municipalities where women were drafted
# Only in census tracts in our sample
df_muni1 <- df_muni |>  
  mutate(Selected = ifelse(samplewomen=="No", NA,
                           ifelse(samplewomen=="Only Men","All Men","Women Poll Officers"))) |>  
  select(-samplewomen) |>  ungroup() |> 
  select(Selected,
         `Share of voters (women)` = femturnout33,
         `(Log) industry tax` = logindust,
         `(Log) Population` = logpop,
         `Literacy (municipality)` = literacymuni,
         `Women %` = femalerate,
         `% Women married` = femalemarried,
         `% Left vote (1933)` = Left_33,
         `% Right vote (1933)` = Right_33,
         `Turnout (1933)` = turnout_33,
         `Female Turnout (1933)` = femturnout33,
         `Ateneu` = ateneu_dummy,
         `Lliga Rallies` = rally_lliga,
         `CNT branch` = cnt_dummy,
         `Population Concentration` = popconcent,
         `% Catholic Associations` = cath_ass_percent33)

tab_c1 <- datasummary_balance(~Selected, 
                              data = df_muni1,
                              fmt = 2,stars = T)

tab_c1

rm(df_muni1)

###### Histograms of female share of turnout ######
# Female turnout by type of polling board
df_mesa_ct <- df_mesa |> 
  group_by(ctid,samplewomen) |> 
  summarise(NumFemPollWork = mean(NumFemPollWork),
            femturnout33 = mean(women_voters_p),
            per_women = mean(femalerate, na.rm = T))

per_w <- df_mesa |> 
  group_by(samplewomen) |> 
  summarise(per_women = mean(femalerate),
            sd_women = sd(femalerate))

fig_c3 <- df_mesa_ct |> 
  ggplot(aes(x=femturnout33))+
  geom_histogram(fill = "grey60")+
  scale_y_continuous(breaks = seq(1, 11, by = 2))+
  scale_x_continuous(breaks = seq(0, 60, by = 10))+
  facet_wrap(~samplewomen,ncol = 1)+
  geom_vline(data = per_w,aes(xintercept = per_women), colour = "black")+
  # geom_text(data = per_w,aes(x=per_women,y=4,
  #                            label=round(per_women,digits = 1)),
  #           hjust = -.1)+
  ylab("# census tracts") + xlab("Share of women voters (1933)")+
  theme(panel.background = element_rect(fill = "white", colour = "grey50"),
        panel.grid = element_line(color = "gray95"),
        strip.background =element_rect(fill="gray95"),
        legend.position = "bottom",
        text = element_text(size=16)
  )

fig_c3

rm(per_w,df_mesa_ct)

###### Correlation between population totals and population concentration ######
# Table to show that popconcent is not an indicator of rural-urban
df_IM <- df |> 
  group_by(muni) |> 
  mutate(pop = round(exp(logpop),digits = 0)) |> # calculate total population
  summarise(population = mean(pop),
            popconcent = mean(popconcent)) |> 
  ungroup() |> 
  mutate(logpop = log(population),
         pop_cat = cut(population,
                       breaks = c(0, 500, 1000, 50000),
                       labels = c("<500", "0.5-1k", ">1k")))

# Mean values of population concentration by population size
df_IM |> mutate(IM_PF_sd = popconcent) |> 
  group_by(pop_cat) |> 
  summarise(mean_IM_PF = mean(popconcent, na.rm=T),
            sd_IM_PF = sd(IM_PF_sd,na.rm = T))

# No correlation between (log) population and population concentration index
c_pop_impf<- cor.test(df_IM$logpop,df_IM$popconcent)

fig_c4 <- df_IM |> 
  ggplot(aes(x=popconcent, y = logpop)) + 
  geom_point(color="lightgray")+
  geom_smooth(method = "lm",color="black")+
  scale_color_grey()+
  labs(y = "(log) Population",
       x = "Population Concentration Index",
       caption = paste0("r = ",
                        round(c_pop_impf[["estimate"]],digits = 2),
                        "; p value = ",
                        round(c_pop_impf[["p.value"]],digits = 2)))+
  theme_minimal()

fig_c4

rm(df_IM,c_pop_impf)

###### Maps of mobilisation ######

#load map
mapgi33 <-  st_read("maps/Girona_munis_1933-35.shp") 

# Combine map with municipality-level information
mapgi_muni <- mapgi33 |> 
  rename(cmuni = codi_CED)|> 
  left_join(df_muni)


mapgi_muni <- mapgi_muni |> 
  mutate(# CNT as factor
    cnt_dummy = factor(cnt_dummy,
                       levels = c(0,1),
                       labels = c("No Anarchist", "Anarchist")),
    # Lliga rallies as factor
    rally_lliga = factor(rally_lliga,
                         levels = c(0,1),
                         labels = c("No Lliga Rally",
                                    "Lliga Rally")),
    # Ateneu presence as factor
    ateneu_dummy = factor(ateneu_dummy,
                          levels = c(0,1),
                          labels = c("No Ateneu",
                                     "Ateneu"))
  )

# CNT Map
mapgi_cnt <- ggplot(mapgi_muni)+
  geom_sf(aes(fill = cnt_dummy)) +
  theme(panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position="bottom") +
  scale_fill_manual(values = c("No Anarchist" = 'lightgray',
                               "Anarchist" = 'black'),
                    na.value = "white") +
  labs(fill="")
mapgi_cnt

# Lliga

mapgi_lliga <- ggplot(mapgi_muni)+
  geom_sf(aes(fill = rally_lliga)) +
  theme(panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position="bottom") +
  scale_fill_manual(values = c("No Lliga Rally" = 'lightgray',
                               "Lliga Rally" = 'black'),
                    na.value = "white") +
  labs(fill="")

mapgi_lliga

# Ateneu
mapgi_ateneu <- ggplot(mapgi_muni)+
  geom_sf(aes(fill = ateneu_dummy)) +
  theme(panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position="bottom") +
  scale_fill_manual(values = c("No Ateneu" = 'lightgray',
                               "Ateneu" = 'black'),
                    na.value = "white") +
  labs(fill="")

mapgi_ateneu

# Catholic associations
mapgi_cath <- ggplot(mapgi_muni)+
  geom_sf(aes(fill = cath_ass_percent33)) +
  theme(panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position="bottom") +
  scale_fill_continuous(low = 'gray95', high = 'black')+
  labs(fill="% Catholic\nAssociations")

mapgi_cath

# Combine
fig_c5 <- ggarrange(mapgi_cnt,mapgi_lliga,mapgi_ateneu,mapgi_cath)

fig_c5

# Remove all map objects
rm(list = grep("^mapgi", ls(), value = TRUE))

###### Correlation municipality-level covariates ######

# Correlations of municipality-level data
df_muni_cor <- df_muni |> group_by(cmuni) |> 
  summarise(`Anarchist` = mean(cnt_dummy),
            `Lliga (right)` = mean(rally_lliga),
            `Ateneu` = mean(ateneu_dummy),
            `Catholic Associations` = mean(cath_ass_percent33),
            `Taxes` = mean(logindust),
            `Turnout (1933)` = mean(turnout_33),
            `% Left (1933)` = mean(Left_33),
            `Population concentration` = mean(popconcent)) |> 
  select(-cmuni) |> 
  na.omit()

cormuni  <- cor(df_muni_cor)

#fig_c6
corrplot(cormuni, type = "lower",method = "ellipse",
         tl.col="black", # color variable
         tl.cex = .9,
         addCoef.col ='black')

rm(df_muni_cor,cormuni)


##### Province-level balance ######
df_prov <- read_xlsx("data_prov_level.xlsx") |> 
  # create log of taxes
  mutate(log_contrib_ind23 = log(contrib_ind23)) |> 
  # define variables to keep
  select(groups,
                `II Republic Acceptance 1931` = acep_rep31,
                `Turnout 1931` = part31,
                `Turnout 1933` = part33,
                `Turnout 1936` = part36,
                `Women in 1932 Census (%)` = p_dones32,
                `Women Married (%)` = marr_w_per,
                `Women Literate (%)` = lit_w_per,
                `Men Literate (%)` = lit_m_per,
                `1923 Industrial taxes (log)` = log_contrib_ind23,
                `Economic Inequality` = ineq30,
                `Jornaleros (%)` = jorn_censcamp
  )

tab_c2 <- datasummary(All(df_prov) ~ groups*(Mean+SD), #+ Mean + SD) ,
            data = df_prov,
            fmt = 2)

tab_c2

rm(df_prov)

# Appendix D - Full estimates ---------------------------------------------

# Model general, Main Table in Appendix to generate Figure 3
models <- list(
  
  ## Only Drafted Sample
  # Effects of effectively serving as polling officer. Treatment: mesa33)
  #Baseline
  "M1" = lm(vot_1936 ~ mesa33 + female + # female negative effects
              age + vot_1933, 
            data = df_mesa),
  #+ Locality controls
  "M2" = lm(vot_1936 ~ mesa33 + female + # female negative effects
              age + vot_1933 + logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa),
  #Interaction x gender
  "M3" = lm(vot_1936 ~ mesa33*female + # female negative, interaction non-significant: the gap closes!
              age + vot_1933 + logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa),
  
  # Effects of types of draft compliance (draft_complier_1933)
  "M4" = lm(vot_1936 ~ draft_complier_1933 + female + # female negative
              age +  vot_1933 + logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa),
  
  #Types of draft compliance x gender
  "M5" = lm(vot_1936 ~ draft_complier_1933*female + # female negative & The gap closes for polling officers!
              age + logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa),
  
  #Types of draft compliance x gender + 1933 vote
  "M6" = lm(vot_1936 ~ draft_complier_1933*female + # female negative & The gap closes for polling officers!
              age + vot_1933 +  #literate +
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa)
)


cm <- c("femaleWomen" = "Woman",
        'mesa_19331' = 'Polling Officer (1933)',
        "mesa_19331:femaleWomen" = "Polling Officer x Woman",
        "draft_complier_1933Draft" = "Drafted", 
        "draft_complier_1933Draft:femaleWomen" = "Drafted x Woman",
        "draft_complier_1933Poll" = "Poll",
        "draft_complier_1933Poll:femaleWomen"="Poll x Woman",
        "draft_complier_1933Defier"="Defier",
        "draft_complier_1933Defier:femaleWomen"="Defier x Woman",
        "age"="Age","literate1"="Literate" ,
        "vot_1933"="Voted 1933",
        "logindust"="(Log) Industry","logpop"="(Log) Population",
        "literacymuni"="Literacy (mun)","femalemarried"="% Married Women",
        # "Left_33" ="% Left (33)","Right_33" = "% Right (33)",
        '(Intercept)' = 'Constant')

tab_d1 <- modelsummary(models,
             vcov = ~muni,
             stars = c('*' = .1, '**' = .05, '***' = .01),
             gof_omit = 'AIC|BIC|Log.Lik.|F|Std.Errors',
             notes = list('Standard errors clustered at the municipality-level'),
             coef_map=cm,
             escape = FALSE,
             output = "kableExtra") |> 
  add_header_above(c(" " = 1, "All drafted" = 6))

tab_d1


# Appendix E - Empirical Robustness ---------------------------------------

#### Power Analysis and Randomization Inference ####

# Power analysis
pwr.2p.test(h = ES.h(p1 = 0.85, p2 = 0.65), sig.level = 0.05, power = .80)

pwr.anova.test(k=3,f=.20,sig.level=.05,power=.8)   

pwr.f2.test(u = 2, f2 = 0.16/(1 - 0.16), sig.level = 0.05, power = 0.8)


# Declare Design
N <- 178 # sample size,
ate <- 16 # average treatment effect 
sd_1 <- 2 # SD 

our_design <- 
  declare_population(N = N, 
                     widgets_t1 = rnorm(n = N, 
                                        mean = 50,
                                        sd = sd_1) %>% round(., 2)) +
  declare_potential_outcomes(Y_Z_0 = widgets_t1, 
                             Y_Z_1 = widgets_t1 + ate) +
  declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_assignment(prob = .5, 
                     assignment_variable = Z, legacy=T) +
  declare_reveal(outcome_variables = Y) + 
  declare_estimator(Y ~ Z,
                    .method = lm_robust,
                    inquiry =  c("ATE"), 
                    label = "Treatment Effect"
  )

diagnosis1 <- our_design |> 
  diagnose_design(sims = 100)

diagnosis1 |> 
  get_diagnosands() |> 
  kable() |> 
  kableExtra::kable_styling()

fig_e1 <- get_simulations(diagnosis1)  |>  
  ggplot(aes(estimate)) + 
  geom_density() +
  geom_vline(xintercept = ate, col = "red") +
  labs(
    subtitle = "Real ATE shown in Red")

fig_e1

rm(diagnosis1,our_design,ate,N,sd_1,tab_d1)

# Appendix F - Instrumental Variable --------------------------------------

## PLOT IV: MARGINAL EFFECTS##

# First-stage
first_stage <- lm(mesa33  ~  DOGC_acting +
                    female + age + Hisco1+
                    logpop + logindust + literacymuni + Left_33 + turnout_33 + comarca,
                  data = df_mesa)

# Predicted values of being a member of the polling board
df_mesa$predmesa33<-predict(first_stage,newdata =df_mesa)


# Coefficient plots for interactions
second_stage1 <- felm(vot_1936 ~ # Dependent Variable
                        predmesa33*female + # Covariates of interest
                        age + Hisco1 +
                        logindust + logpop + literacymuni + # Controls
                        Left_33 + turnout_33 | # More Controls
                        comarca, # Fixed-Effects
                      data = df_mesa)

second_stage2 <- felm(vot_1936 ~ # Dependent Variable
                        predmesa33*female + # Covariates of interest
                        vot_1933 +  # Add 1933 vote as a control
                        age + Hisco1 +
                        logindust + logpop + literacymuni + # Controls
                        Left_33 + turnout_33 | # More Controls
                        comarca, # Fixed-Effects
                      data = df_mesa)

models1 <- list("Model 1" = second_stage1, "Model 2" = second_stage2)

# fig_f1
fig_f1 <- multiplot(models1,
          title = "IV: Second Stage Coefficient Plot",
          newNames = c("predmesa33"="Pred. Poll officer",
                       "femaleWomen"="Woman",
                       "predmesa33:femaleWomen"= "Pred. Poll Officer \n× Woman"),
          coefficients = c("predmesa33","femaleWomen","predmesa33:femaleWomen"),
          sort = "magnitude") +
  theme_bw() + 
  scale_color_manual(labels = c("W/o Controls", "Full Model"),values=c("darkgray","black"))+
  theme(legend.position="bottom",legend.title = element_blank())+
  ylab("") + xlab("Marginal Effects")

fig_f1

## Predicted likelihood
stage2 <- feols(vot_1936 ~ # Dependent Variable
                  predmesa33*female # Covariates of interest
                + comarca, data = df_mesa)
iv_graph <- ggpredict(stage2, c("predmesa33","female"), ci.lvl = 0.9)

fig_f2a <- ggplot(iv_graph, aes(x=x, y = predicted,color=group)) +
  # geom_point(size = 2) +
  geom_pointrange(aes(ymax = conf.high, ymin = conf.low), linewidth=.3,
                  position=position_dodge(width=0.05)) +
  theme(panel.background = element_rect(fill = "white", colour = "grey50"), 
        legend.position = "bottom",
        text = element_text(size=20)) +
  scale_color_manual(name="",
                     labels=c("Men",
                              "Women"), values=c("darkgray","black")) +
  scale_y_continuous( "Predicted Turnout 1936") +
  scale_x_continuous( "Predicted Likelihood to be Polling Officer",
                      limits = c(-.05,1.05),
                      breaks = c(0,.2,.4,.6,.8,1),
                      expand = c(.05,.05)) +
  labs(caption = "90% Confidence Intervals")

fig_f2a

# Binary predicted likelihood
df_mesa$predmesa33b[df_mesa$predmesa33<.5] <- 0
df_mesa$predmesa33b[df_mesa$predmesa33>.5] <- 1
df_mesa$predmesa33b <- factor(df_mesa$predmesa33b,
                              levels=c(0,1),
                              labels=c("No", "Yes"))

second_stage3 <- feols(vot_1936 ~ # Dependent Variable
                         predmesa33b*female + # Covariates of interest
                         # age + Hisco1 +
                         # logpop + logindust + literacymuni + # Controls
                         # Left_33 + turnout_33 +
                         comarca, data = df_mesa)

iv_graph <- ggpredict(second_stage3, c("predmesa33b","female"), ci.lvl = 0.9)
fig_f2b <- ggplot(iv_graph, aes(x=x, y = predicted,color=group)) +
  geom_pointrange(aes(ymax = conf.high, ymin = conf.low),linewidth=.3,
                  position=position_dodge(width=0.2)) +
  theme(panel.background = element_rect(fill = "white", colour = "grey50"), 
        legend.position = "bottom",
        text = element_text(size=20)) +
  scale_color_manual(name="",
                     labels=c("Men",
                              "Women"), values=c("darkgray","black")) +
  scale_y_continuous( "Predicted Turnout 1936") +
  xlab("Predicted Likelihood \nto be Polling Officer (Binary)")+
  labs(caption = "90% Confidence Intervals")

fig_f2b


rm(first_stage,iv_graph,models,models1,second_stage1,second_stage2,second_stage3,stage2)

###### Calculate ITT and CACE ######

#ITT
itt_model <- lm(vot_1936 ~ DOGC_acting*female + age + Hisco1 + logpop + logindust + literacymuni + # Controls
                  Left_33 + turnout_33 + comarca  , data = df_mesa)

tidy(itt_model)

ITT <- tidy(itt_model) |> 
  filter(term == "DOGC_acting:femaleWomen") |> print() |> 
  pull(estimate)
ITT #Being assigned to treatment increases average turnout by 0.08738516 percentage points (model with no controls)
#Model with controls (above) = ITT 0.2050827

#Proportion of compliers

compliers <- df_mesa  |>  
  group_by(mesa33, DOGC_acting) |>  
  summarise(n = n())|>  
  mutate(prop = n / sum(n))
#We have 86% of compliers. 

yest <- compliers$prop[4]# prop yes in treatment
yesc <- compliers$prop[2]# prop yes in treatment

# pi_c = prop yes in treatment - prop yes in control
pi_c <- yest - yesc
pi_c

CACE <- ITT / pi_c
CACE #Being on the polling station increased 0.2999075 percentage points the probability to turnout for compliers

model_2slsb <- iv_robust(vot_1936 ~ mesa33*female + age + Hisco1 + logpop + logindust + literacymuni + # Controls
                           Left_33 + turnout_33 + comarca  | DOGC_acting*female +  age + Hisco1 + logpop + logindust + literacymuni + # Controls
                           Left_33 + turnout_33 + comarca ,  data = df_mesa)
tidy(model_2slsb) #CACE among women 0.3435619964 


rm(CACE,cm,ITT,pi_c,yesc,yest,itt_model,model_2slsb,compliers)


# Appendix G - Further Analyses -------------------------------------------
##### Who are the defiers #####
### Women's likelihood to be defiers ###
df_mesa <- df_mesa  |> 
  mutate(defier33 = if_else(draft_complier_1933=="Defier",1,0))

models <- list(
  model_defier1 <- feols(defier33 ~
                           female + #negative effect
                           age +  # positive effect
                           Hisco1 | # owners (ref. cat), more likely to be defier
                           comarca, # comarca FE
                         data = df_mesa),
  model_defier2 <- feols(defier33 ~ female + age + Hisco1 +
                           logindust + logpop + literacymuni + femalemarried | comarca ,
                         data = df_mesa)
)

cm <- c("femaleWomen" = "Female",
        "age" = "Age",
        "logindust" = "(Log) Industry",
        "logpop" = "(Log) Population",
        "literacymuni" = "Literacy (mun)",
        "femalemarried" = "% Married Women",
        "(Intercept)" = "Constant")

tab_g1 <- modelsummary(models,
             vcov = ~cmuni,stars = TRUE,
             gof_omit = 'AIC|BIC|Log.Lik.|F|Std.Errors',
             notes = list('Control for individual occupation also included.',
                          'comarca FE',
                          'Standard errors clustered at the municipality-level'),
             coef_map=cm)

tab_g1

rm(model_defier1,model_defier2)

##### Matching #####

###### Matching procedure ######
#### Matching ####
# 
# # This is the code we used to match poll officers to individuals from a random sample
# # of voters in the Girona provice. Access to these data is restricted, but we are happy
# # to share access upon request
# 
# # variables to do matching
# df_match <- df |> 
#   mutate(dogc = ifelse(dataset=="2",1,0)) |> 
#   select(dogc, age, literacy,vot_1933,vot_1936,Hisco1,female,
#          logindust,literacymuni,logpop,literacyfemale,femalemarried, femalerate, popconcent,
#          comarca) |> 
#   mutate(comarca = as_factor(comarca)) |> 
#   na.omit()
# 
# # matching process
# mod_match <- matchit(dogc ~ age + literacy + female + Hisco1 +
#                        logindust + literacymuni + logpop + popconcent + 
#                        femalerate + femalemarried,
#                      method = "nearest", # nearest-matching
#                      data = df_match)  
# 
# # matching data to dataset
# df_match1 <- match.data(mod_match) |> 
#   select(c(individualid,weights,subclass))
# 
# # Join matched dataset to complete dataset 
# df_m <- df  |> 
#   left_join(df_match1)  |>  
#   filter(!is.na(weights)) # keep only if matched sample
# 
# rm(df_match,df_match1,mod_match)

# For the purposes of reproducibility the database includes
# those individuals who were matched based on the application
# of the previous code
df_m <- df


###### Balance of control group###### 
# Show balance of matched control group in most pre-treatment covariates, and imbalances in post-treatment
df_match <- df_m |> 
  # left_join(df_match1)  |> 
  # filter(!is.na(weights)) |>  na.omit() |>  
  ungroup() |> 
  mutate(Selected = ifelse(dataset==2, 'All Drafted', 'Matching Control')) |> 
  select(Selected,
         `Age`=age,
         `Sex` = female,
         `Literacy (Individual)`=literacy,
         `Occupation`=Hisco1,
         `Voted 1933`=vot_1933,
         `Voted 1936`=vot_1936,
         `(Log) industry (muni)` = logindust,
         `(Log) Population (muni)` = logpop,
         `Literacy (muni)` = literacymuni,
         `% Women married (muni)` = femalemarried)

tab_g2 <- datasummary_balance(~Selected, 
                    data = df_match,
                    fmt = 2,stars = T,
                    output = "kableExtra") |> 
  add_header_above(c(" "=2, "Extended Sample" = 4," "=2))

tab_g2

rm(df_match)

###### Matching: Main Table  ######
models <- list(
  
  ## Only Drafted Sample
  #Interaction x gender
  "M1" = lm(vot_1936 ~ mesa33*female + # female negative, interaction non-significant: the gap closes!
              age + vot_1933 + 
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa),
  
  #Types of draft compliance x gender WITHOUT 1933 vote
  "M2" = lm(vot_1936 ~ draft_complier_1933*female + # female negative & The gap closes for polling officers!
              age +  
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa),
  
  #Types of draft compliance x gender WITH 1933 vote
  "M3" = lm(vot_1936 ~ draft_complier_1933*female + # female negative & The gap closes for polling officers!
              age + vot_1933 +  
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_mesa),
  
  ## Enlarged dataset including a random sample of the Girona province
  
  # Serving as polling officer x Gender (large sample)
  "M4" = lm(vot_1936 ~ mesa33*female + # female negative, interaction non-significant: the gap closes!
              age + vot_1933 + 
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_m),
  #Types of draft compliance x gender (large sample)
  "M5" = lm(vot_1936 ~ draft_complier_1933*female + # female negative, interaction: gap for drafted but not for polling officers
              age + 
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_m), 
  #Types of draft compliance x gender (large sample) + vote record in 1933 (vot_1933)
  "M6" = lm(vot_1936 ~ draft_complier_1933*female + # most effects drop when including vote_1933 as a control
              age + vot_1933 +  #literate + 
              logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33,
            data = df_m)
)


cm <- c("femaleWomen" = "Female",
        'mesa_19331' = 'Polling Officer (1933)',
        "mesa_19331:femaleWomen" = "Polling Officer x Female",
        "draft_complier_1933Draft" = "Alternate", 
        "draft_complier_1933Draft:femaleWomen" = "Alternate x Female",
        "draft_complier_1933Poll" = "Poll Officer",
        "draft_complier_1933Poll:femaleWomen"="Poll Officer x Female",
        "draft_complier_1933Defier"="Defier",
        "draft_complier_1933Defier:femaleWomen"="Defier x Female",
        "age"="Age","literate1"="Literate" ,
        "vot_1933"="Voted 1933",
        "logindust"="(Log) Industry","logpop"="(Log) Population",
        "literacymuni"="Literacy (mun)","femalemarried"="% Married Women",
        '(Intercept)' = 'Constant')

tab_g3 <- modelsummary(models, 
             vcov = ~muni,
             stars = c('*' = .1, '**' = .05, '***' = .01),
             gof_omit = 'AIC|BIC|Log.Lik.|F|Std.Errors',
             notes = list('Standard errors clustered at the municipality-level',
                          'Dependent Variable: Vote in 1936. Controls for left and right parties in 1933 included'
             ),
             coef_map=cm,
             output = "kableExtra") |> 
  add_header_above(c(" "=1, "Sample" = 3,"Sample+Control"=3)) 
  

tab_g3

###### Graph: Second Stage IV using matching ######
df_pred <- df_mesa  |> 
  select(individualid,predmesa33,predmesa33b) 

df_m <- df_m |> 
  left_join(df_pred) |> 
  mutate(predmesa33b = ifelse(is.na(predmesa33),1,predmesa33b))

df_m$predmesa33b <- factor(df_m$predmesa33b,
                           levels=c(1,2),
                           labels=c("No", "Yes"))

second_stage_m1 <- felm(vot_1936 ~ # Dependent Variable
                          predmesa33b*female + # Covariates of interest
                          age + Hisco1 +
                          logindust + logpop + literacymuni + # Controls
                          Left_33 + turnout_33 | # More Controls
                          comarca, # Fixed-Effects
                        data = df_m)

second_stage_m2 <- felm(vot_1936 ~ # Dependent Variable
                          predmesa33b*female + # Covariates of interest
                          vot_1933 + 
                          age + Hisco1 +
                          logindust + logpop + literacymuni + # Controls
                          Left_33 + turnout_33 | # More Controls
                          comarca, # Fixed-Effects
                        data = df_m)

models2 <- list("Model 1" = second_stage_m1, "Model 2" = second_stage_m2)


fig_g1 <- multiplot(models2, #color = "black",
                    title = "IV: Second Stage Coefficient Plot (matching)",
                    newNames = c("predmesa33bYes"="Pred. Poll officer",
                                 "femaleWomen"="Woman",
                                 "predmesa33bYes:femaleWomen"= "Pred. Poll Officer \n× Woman"),
                    coefficients = c("predmesa33bYes","femaleWomen","predmesa33bYes:femaleWomen"),
                    sort = "magnitude") +
  theme_bw() + 
  scale_color_manual(labels = c("W/o Controls", "Full Model"),values=c("darkgray","black"))+
  theme(legend.position="bottom",legend.title = element_blank())+
  ylab("") + xlab("Marginal Effects on Turnout (1936)")

fig_g1

rm(models,models2,second_stage_m1,second_stage_m2)

###### Baseline Results: Figure 3 Using vot_1933 as DV ######

# Top panel: polling officers vs. not polling officers
mpred <- lm(vot_1933 ~ mesa33*female + # female negative, interaction non-significant: the gap closes!
              age + logindust + logpop + literacymuni + femalemarried,
            data = df_m)

mydf <- ggpredict(mpred, c("mesa33","female"), ci.lvl = 0.95)
mydf2 <- ggpredict(mpred, c("mesa33","female"), ci.lvl = 0.90)

mydf2 <- mydf2 |> 
  rename(x2=x, pred2 = predicted,std.error2=std.error,
         conf.low2=conf.low,conf.high2=conf.high, group2=group)

mydf <- cbind(mydf,mydf2)
mydf$year <- 1933

# Model 36 
mpred <- lm(vot_1936 ~ mesa33*female + # most effects drop when including vote_1933 as a control
              age + logindust + logpop + literacymuni + femalemarried ,
            data = df_m)
# CI
mydf3 <- ggpredict(mpred, c("mesa33","female"), ci.lvl = 0.95)
mydf4 <- ggpredict(mpred, c("mesa33","female"), ci.lvl = 0.9)

mydf4 <- mydf4 |> 
  rename(x2=x, pred2 = predicted,std.error2=std.error,
         conf.low2=conf.low,conf.high2=conf.high, group2=group)

mydf3 <- cbind(mydf3,mydf4)
mydf3$year <- 1936

mydf_f <- rbind(mydf,mydf3)


mydf_f$x <- factor(mydf_f$x,
                   levels=c(0,1),
                   labels=c("No","Yes")) 

fig_g2a <- ggplot(mydf_f, aes(x=x, y = predicted,color=group,label = round(predicted,digits = 2))) +
  geom_point(size = 5,position=position_dodge(0.4),aes(shape=as.factor(year), color=group),)+  # aes(shape=group)
  geom_text(position = position_dodge(width = 1.2)) +
  geom_pointrange(aes(ymax = conf.high, ymin = conf.low, fill = as.factor(year), color=group),
                  linewidth=.3, position=position_dodge(width=0.4), fatten = 0, ) +
  geom_pointrange(aes(ymax = conf.high2, ymin = conf.low2, fill = as.factor(year), color=group),
                  linewidth=.75, position=position_dodge(width=0.4), fatten = 0, ) +
  geom_hline(yintercept=1, linetype="dashed", size = 1,
             color = "grey30", size=2, alpha = .1)+
  theme(panel.background = element_rect(fill = "white", colour = "grey50"), 
        legend.position = "bottom",
        text = element_text(size=20))+ 
  scale_shape_discrete(name="")+
  scale_fill_discrete(guide="none")+
  scale_color_manual(name="",
                     labels=c("Men", "Women"),
                     values=c("darkgray", "black")) +
  scale_y_continuous( "Predicted turnout") +
  scale_x_discrete( "Poll Officer in 1933") 

fig_g2a


# Bottom panel: polling officers vs. not polling officers
# Model  33
mpred <- lm(vot_1933 ~ draft_complier_1933*female + # most effects drop when including vote_1933 as a control
              age + logindust + logpop + literacymuni + femalemarried,
            data = df_m)
# CI
mydf <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.95)
mydf2 <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.9)

mydf2 <- mydf2 |> 
  rename(x2=x, pred2 = predicted,std.error2=std.error,
         conf.low2=conf.low,conf.high2=conf.high, group2=group)

mydf <- cbind(mydf,mydf2)
mydf$year <- 1933

# Model 36 
mpred <- lm(vot_1936 ~ draft_complier_1933*female + # most effects drop when including vote_1933 as a control
              age + logindust + logpop + literacymuni + femalemarried ,
            data = df_m)
# CI
mydf3 <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.95)
mydf4 <- ggpredict(mpred, c("draft_complier_1933","female"), ci.lvl = 0.9)

mydf4 <- mydf4 |> 
  rename(x2=x, pred2 = predicted,std.error2=std.error,
         conf.low2=conf.low,conf.high2=conf.high, group2=group)

mydf3 <- cbind(mydf3,mydf4)
mydf3$year <- 1936

mydf_f <- rbind(mydf,mydf3)

# Change labels
levels(mydf_f$x) <- c("Control\n(matching)", "Alternate", "Poll \nOfficer", "Defier")
# levels(mydf$x) <- c("Draft", "Poll Officer", "Defier")

fig_g2b <- ggplot(mydf_f, aes(x=x, y = predicted,color=group,label = round(predicted,digits = 2))) +
  geom_point(size = 5,position=position_dodge(0.4),aes(shape=as.factor(year), color=group),)+  # aes(shape=group)
  geom_text(position = position_dodge(width = 1.4)) +
  geom_pointrange(aes(ymax = conf.high, ymin = conf.low, fill = as.factor(year), color=group),
                  linewidth=.3, position=position_dodge(width=0.4), fatten = 0, ) +
  geom_pointrange(aes(ymax = conf.high2, ymin = conf.low2, fill = as.factor(year), color=group),
                  linewidth=.75, position=position_dodge(width=0.4), fatten = 0, ) +
  geom_hline(yintercept=1, linetype="dashed", size = 1,
             color = "grey30", size=2, alpha = .1)+
  theme(panel.background = element_rect(fill = "white", colour = "grey50"), 
        legend.position = "bottom",
        text = element_text(size=20))+ 
  scale_shape_discrete(name="")+
  scale_fill_discrete(guide="none")+
  scale_color_manual(name="",
                     labels=c("Men", "Women"),
                     values=c("darkgray", "black")) +
  scale_y_continuous( "Predicted turnout") +
  scale_x_discrete( "") 

fig_g2b

rm(mpred,mydf,mydf_f,mydf2,mydf3,mydf4)

##### Placebo Test #####
### PLACEBO TEST: VOTING IN 1931 ELECTIONS ###

models <- list(
  "M1" = lm(vot_1931 ~ DOGC_acting + age + Hisco1 + vot_1933 +
              logindust + logpop + literacymuni + femalemarried,
            data = df_mesa),
  "M2" = lm(vot_1931 ~ mesa33 + age + Hisco1 + vot_1933 +
              logindust + logpop + literacymuni + femalemarried,
            data = df_mesa),
  "M3" = lm(vot_1931 ~ draft_complier_1933 + age + Hisco1 + vot_1933 +
              logindust + logpop + literacymuni + femalemarried, 
            data = df_mesa)
)
summary(models$M3)
cm <- c("DOGC_acting" = "Draft: Acting",
        'mesa_19331' = 'Acting Poll Officer \n(1933)',
        "draft_complier_1933Draft"="Draft", 
        "draft_complier_1933Poll"="Poll Officer","draft_complier_1933Defier"="Defier",
        "age"="Age",
        "vot_1933"="Voted 1933",
        "logindust"="(Log) Industry","logpop"="(Log) Population",
        "literacymuni"="Literacy (mun)","femalemarried"="% married women",
        '(Intercept)' = 'Constant')

rows <- tribble(~term,          ~M1,  ~M2,  ~M3,
                'Reference: Alternate', ' ',   ' ',  '-',
                '', ' ',   ' ',  '-')
attr(rows, 'position') <- c(5,6)

tab_g4 <- modelsummary(models,
             vcov = ~muni,
             stars = TRUE,
             gof_omit = 'AIC|BIC|Log.Lik.|F|Std.Errors|RMSE',
             notes = list('Standard errors clustered at the municipality-level'),
             coef_map=cm,
             add_rows = rows,
             output = "kableExtra") |> 
  add_header_above(c(" "=1, "Only Men" = 3)) 
  

tab_g4

rm(models,rows)

##### Heterogeneous Effects #####

###### Mobilization ######
### Table models with triple interactions
## Models ##
levels(df_mesa$predmesa33b) <- c("No Pred. Poll Officer","Pred. Poll Officer")
df_mesa <- df_mesa |> mutate(dummy= ifelse(cnt_dummy==0,0,1))

df_mesa$dummy <- factor(df_mesa$dummy,
                        levels=c(0,1),
                        labels=c("No", "Yes"))

reg_mob_cnt <-lm(vot_1936 ~ predmesa33b*female*dummy + age + vot_1933 +
                   logindust + logpop + literacymuni + femalemarried + parity,
                 data = df_mesa)

df_mesa <- df_mesa |> mutate(dummy= ifelse(rally_lliga==0,0,1))
df_mesa$dummy <- factor(df_mesa$dummy,
                        levels=c(0,1),
                        labels=c("No", "Yes"))

reg_mob_lliga <-lm(vot_1936 ~ predmesa33b*female*dummy + age + vot_1933 +
                     logindust + logpop + literacymuni + femalemarried + parity,
                   data = df_mesa)

df_mesa <- df_mesa |> mutate(dummy= ifelse(ateneu_dummy==0,0,1))
df_mesa$dummy <- factor(df_mesa$dummy,
                        levels=c(0,1),
                        labels=c("No", "Yes"))

reg_mob_ateneu <-lm(vot_1936 ~ predmesa33b*female*dummy + age + vot_1933 +
                      logindust + logpop + literacymuni + femalemarried + parity,
                    data = df_mesa)

m_cath <- mean(df_mesa$cath_ass_percent33) #mean value of catholic associations
df_mesa <- df_mesa |> mutate(dummy= ifelse(cath_ass_percent33<m_cath,0,1))
df_mesa$dummy <- factor(df_mesa$dummy,
                        levels=c(0,1),
                        labels=c("No", "Yes"))
reg_mob_cath <-lm(vot_1936 ~ predmesa33b*female*dummy + age + vot_1933 +
                    logindust + logpop + literacymuni + femalemarried + parity,
                  data = df_mesa)
summary(reg_mob_cath)
models <- list(reg_mob_cnt,reg_mob_lliga,reg_mob_ateneu,reg_mob_cath)

cm <- c("predmesa33bPred. Poll Officer" = "Pred. Poll Officer",
        "femaleWomen" = "Woman",
        "dummyYes" = "Mobilization",
        "predmesa33bPred. Poll Officer:femaleWomen" = "Pred. Poll Officer x Woman",
        "predmesa33bPred. Poll Officer:dummyYes" = "Pred. Poll Officer x Mobilization",
        "femaleWomen:dummyYes" = "Woman x Mobilization",
        "predmesa33bPred. Poll Officer:femaleWomen:dummyYes" = "Pred. Poll Officer x Woman x Mobilization",
        "age" = "Age",
        "vot_1933" = "Vote 1933",
        "logindust" = "(Log) Industry",
        "logpop" = "(Log) Population",
        "literacymuni" = "Literacy (mun)",
        "femalemarried" = "% Married Women",
        "parity" = "Pol. Competitiveness",
        "(Intercept)" = "Constant")

modelsummary(models, 
             output = "kableExtra",
             stars = TRUE,
             gof_omit = 'AIC|BIC|Log.Lik.|F|Std.Errors',
             notes = list('Control for individual occupation also included.',
                          'comarca FE',
                          'Cath. Assoc. is a dummy 0=below mean, 1=above mean percentage catholic organizations in a locality',
                          'Standard errors clustered at the municipality-level'),
             coef_map=cm)  |>  
  add_header_above(header = c(" ", "CNT", "Lliga", "Ateneu", "Cath. Assoc."))

### Figure with mobilization and treatment compliance

levels(df_mesa$draft_complier_1933) <- c("Control","Altern.","Poll\nOfficer","Defier")

# CNT
df_mesa$cnt_dummy <- factor(df_mesa$cnt_dummy,
                            levels = c(0,1),
                            labels = c("CNT = No", "CNT = Yes"))

regcnt <- lm(vot_1936 ~ draft_complier_1933*female*cnt_dummy + age + vot_1933 +
               logindust + logpop + literacymuni + femalemarried,
             data = df_mesa)

gcnt <- plot(ggpredict(regcnt, c("draft_complier_1933","female","cnt_dummy"), ci.lvl = 0.9), 
             show.title = F , colors =c("darkgray","black")) +
  ylab("Pred. Turnout (1936)") + xlab(element_blank())+
  theme(legend.title = element_blank()) +
  theme_minimal()
gcnt

# Lliga
df_mesa$rally_lliga <- factor(df_mesa$rally_lliga,
                              levels = c(0,1),
                              labels = c("Lliga = No", "Lliga = Yes"))
reglliga <- lm(vot_1936 ~ draft_complier_1933*female*rally_lliga + age + vot_1933 +
                 logindust + logpop + literacymuni + femalemarried,
               data = df_mesa) 

glliga <- plot(ggpredict(reglliga, c("draft_complier_1933","female","rally_lliga"), ci.lvl = 0.9), 
               show.title = F, colors =c("darkgray","black")) +
  ylab("Pred. Turnout (1936)") + xlab(element_blank())+
  # ylab(element_blank()) + xlab(element_blank())+
  theme(legend.title = element_blank()) + theme_minimal()
glliga

#interaccio amb ateneu
df_mesa$ateneu_dummy <- factor(df_mesa$ateneu_dummy,
                               levels = c(0,1),
                               labels = c("Ateneu = No", "Ateneu = Yes"))

regateneu <- lm(vot_1936 ~ draft_complier_1933*female*ateneu_dummy + age + vot_1933 +
                  logindust + logpop + literacymuni + femalemarried,
                data = df_mesa)

gateneu <- plot(ggpredict(regateneu, c("draft_complier_1933","female","ateneu_dummy"), ci.lvl = 0.9), 
                show.title = F, colors =c("darkgray","black")) +
  ylab("Pred. Turnout (1936)") + xlab(element_blank())+
  # ylab(element_blank()) + xlab(element_blank())+
  theme(legend.title = element_blank())+ theme_minimal()

gateneu

#interaccio amb associacions catoliques
regcath <- lm(vot_1936 ~ draft_complier_1933*female*cath_ass_percent33 + age + vot_1933 +
                logindust + logpop + literacymuni + femalemarried,
              data = df_mesa)

gcath <- plot(ggpredict(regcath, c("draft_complier_1933","female","cath_ass_percent33  [0,30]"),
                        ci.lvl = 0.9),show.title = F, colors =c("darkgray","black")) +
  ylab("Pred. Turnout (1936)") + xlab(element_blank())+
  # ylab(element_blank()) + xlab(element_blank())+
  theme(legend.title = element_blank()) + theme_minimal()

cath_lab <- as_labeller(c("cath_ass_percent33 = 0" = "0% Cath. Assoc.",
                          "cath_ass_percent33 = 30" = "30% Cath. Assoc."))

gcath<- gcath + facet_grid(cols = vars(facet),
                           labeller = labeller(facet = cath_lab))
gcath

# Combination graphs
fig_g3 <- gcnt + glliga + gateneu + gcath & 
  theme(legend.position = "bottom",legend.title = element_blank()) +
  plot_layout(guides = "collect") + labs(caption = "90% Confidence Intervals") 

fig_g3

# Remove objects
rm(gateneu,gcath,gcnt,glliga,cath_lab,m_cath)
rm(list = grep("^reg", ls(), value = TRUE))

###### Peer Effects ######
### Data adjustments
df_peer <- df_m
df_peer <- df_peer |> 
  mutate(FemPollWork = ifelse(NumFemPollWork>0,1,0))

df_peer$FemPollWork <- factor(df_peer$FemPollWork,
                              levels = c(0,1),
                              labels = c("All Male Poll Officers", "Women Poll Officer"))

df_peer <- df_peer  |> 
  filter(draft_complier_1933!="Defier")

levels(df_peer$draft_complier_1933) <- c("Control\n(matching)","Alternate","Poll\nOfficer","Defier")

df_peer$NumFemPollWork <- factor(df_peer$NumFemPollWork,
                                 levels = c(0,1,2),
                                 labels = c("All Male", "1 Woman", "2 Women"))

### Table G6
fempoll1 <-feols(vot_1936 ~ NumFemPollWork*female*draft_complier_1933 + age | cmuni,
                 data = df_peer)
tab_g6 <- summary(fempoll1)

tab_g6


### Peer: Graph 1

pred_fempoll <- ggpredict(fempoll1, c("draft_complier_1933","female","NumFemPollWork"), ci.lvl = 0.9,)
pred_fempoll[16,2] <- NA
pred_fempoll[16,4] <- NA
pred_fempoll[16,5] <- NA

fig_g4 <- plot(pred_fempoll , show.title = F) +
  theme_minimal() + scale_color_manual(name="", values=c("darkgray","black")) +
  ylab("Predicted Turnout (1936)") + xlab(element_blank()) + 
  labs(title = "", caption = '90% CI \nMunicipality FE')+
  theme(legend.position="bottom")

fig_g4

### Peer: Effects on all individuals based on female turnout
votants_d <- feols(vot_1936 ~ female*women_voters_p + mesa33 +
                     age + literacy + vot_1933 +  #
                     logindust + logpop + literacymuni + femalemarried + Left_33 + Right_33 ,
                   data = df,cluster = "cmuni")


summary(votants_d)

fig_g5 <- plot(ggpredict(votants_d, c("women_voters_p","female"), ci.lvl = 0.9), 
     show.title = F) + theme_minimal()+
  scale_color_manual(name="", values=c("darkgray","black")) +
  scale_fill_manual(name="", values=c("darkgray","black")) +
  ylab("Individual Predicted Turnout (1936)") +
  xlab("% Female voters in 1933. Municipality") + 
  theme(legend.title = element_blank(), legend.position="bottom")

fig_g5


###### Social Proximity ######

### Peer: Larger effects in places with more social concentration & woman poll officer
df_mesa <- df_mesa  |> 
  mutate(femvoters_d = ifelse(women_voters_p<40,0,1))

df_mesa$femvoters_d <- factor(df_mesa$femvoters_d,
                              levels = c(0,1),
                              labels = c("<40%\nFemale\nVoters", ">40%\nFemale\nVoters"))

levels(df_mesa$predmesa33b) <- c("Predicted Poll Officer\nNO", "Predicted Poll Officer\nYES")

votants_d1 <- feols(vot_1936 ~ female*femvoters_d*predmesa33b + age,
                    data = df_mesa)

summary(votants_d1)

fig_g6 <- plot(ggpredict(votants_d1, c("femvoters_d","female","predmesa33b"), ci.lvl = 0.9), 
     show.title = F) + theme_minimal() +
  ylab("Individual Predicted Turnout (1936)") + xlab(element_blank()) + #xlab("% Female voters in 1933. Municipality") + 
  scale_color_manual(name="", values=c("darkgray","black")) +
  labs(caption = '90% CI \nMunicipality FE')+
  theme(legend.position="bottom") 

fig_g6

### Alternative Graph
df_m <- df_m  |>
  mutate(FemPollWork = ifelse(NumFemPollWork>0,1,0))
df_m$FemPollWork <- factor(df_m$FemPollWork,
                           levels = c(0,1),
                           labels = c("All Male Poll Officers", "Women Poll Officer"))

# Graph including more controls
x3popcon1 <- lm(vot_1936 ~ FemPollWork*female*popconcent + age + vot_1933 + comarca,
                data = df_m)

summary(x3popcon1)

fig_g7 <- plot(ggpredict(x3popcon1, c("popconcent","female","FemPollWork"), ci.lvl = 0.9,), 
     show.title = F) + theme_minimal()+
  scale_color_manual(name="", values=c("darkgray","black")) +
  scale_fill_manual(name="", values=c("darkgray","black")) +
  ylab("Predicted Turnout (1936)") +
  xlab("Population Concentration") +
  theme(legend.title = element_blank(), legend.position="bottom")

fig_g7


# End of script -----------------------------------------------------------
rm(list = ls())
